/*Creating bank-level data set*/

local Data "BankLevelAnalysis"

noi disp "Obtaining bank financial data for bank-level regressions..."
noi disp "Verifying that `Data'.dta does not already exist..."
* This returns an error if the file does not exist.
capture confirm file "`Data'.dta"

* If an error is returned, then the commands below are run.
if _rc == 601 {
	noi disp "Data not found.  Building data..."
	noi disp "Building `Data'.dta..."


noi disp "Loading RIS.dta..."
use "$SourceData\RIS.dta", replace
noi disp "Finished loading RIS.dta."

gen temp = cpi if year==2001
	egen base = max(temp)
	drop temp

	gen cpi_adj = cpi/base
	gen asset2 = asset/cpi_adj
	gen small = 1 if asset2<=1000000


	sort cert year
	xtset cert year
	
		gen Lasset = L.asset
		gen Lliab  = L.liab
		gen Leq  = L.eq
	

	gen Lsmall=L.small

	preserve
		noi disp "Loading merger_updated.csv..."	
		insheet using "$SourceData\mergers.csv", clear
		sort cert year
		by cert year: gen merger = _n
		keep if merger==1
		keep merger cert year
		noi disp "Saving merged_certs.dta..."
		save "$LocalData\merged_certs.dta", replace
	restore

*Generate Bank Portfolio

	sort cert year
	merge 1:1 cert year using "$LocalData\merged_certs.dta"
	drop _merge
	replace merger = 0 if missing(merger)
	*drop if merger==1

*Merging in Bank County Deposit Weights
	merge 1:m cert year using "$LocalData\Bank_Dep_Wghts.dta"

	drop _merge
	rename stcntybr geography
	sort geography year

*Combining with other data	
	merge m:1 geography year using "$LocalData\BartikAnalyticData_2digit.dta"
	
	gen missing_county = 1 if missing(c_i_share_200011) & year>=2002 & year<=2017
	bysort cert: egen missing_bank_county = max(missing_county)
	drop if missing_bank_county==1
	
	keep if _merge==3
	egen cert_geo = group(cert geography)
	sort cert_geo year
	xtset cert_geo year
	gen Lbank_county_wght = L.bank_county_wght
	
	sort cert year geography cert_geo
	order cert year geography cert_geo bartik*
				
foreach span in 1 5 {
	bysort cert year: asgen bank_bartikGL`span' = bartikGL`span', weight(Lbank_county_wght)
	bysort cert year: asgen bank_bartikGS`span' = bartikGS`span', weight(Lbank_county_wght)
	bysort cert year: asgen bank_gSmallFirm`span' = gSmallFirm`span', weight(Lbank_county_wght)
	bysort cert year: asgen bank_gLargeFirm`span' = gLargeFirm`span', weight(Lbank_county_wght)	
	}
	
	
	bysort cert year: asgen bank_dlngdp = dlngdp, weight(Lbank_county_wght)
	bysort cert year: asgen bank_dlnpop = pop_growth, weight(Lbank_county_wght)
	bysort cert year: asgen bank_dhpi   = annualchange, weight(Lbank_county_wght)
	bysort cert year: asgen bank_dunemp = unem_change, weight(Lbank_county_wght)
	
						*********TEST by running this piece over and over*
							preserve	
								bysort cert year: gen obs2 = _n
								
								sort cert year obs2 geography
								order cert year geography obs2 bartik*
								
					*			noi sum bank_gSmallFirm1 bank_bartikGS1
								
								keep if obs2==1

								sort cert year
								xtset cert year
						
								noi sum bank_gSmallFirm1 bank_bartikGS1
							restore
						**********************************************
			
			bysort cert year: gen obs2 = _n
			
			sort cert year obs2 geography
			order cert year geography obs2 bartik*
				
*			noi sum bank_gSmallFirm1 bank_bartikGS1
				
			keep if obs2==1
			sort cert year
			xtset cert year
		
			noi sum bank_gSmallFirm1 bank_bartikGS1			
			
	
	gen bank_lnci = ln(lnci+1) - ln(L.lnci+1)	
	gen bank_lndep = ln(bank_dep+1) - ln(L.bank_dep+1)
	gen bank_lnci4 = ln(lnci4 + 1) - ln(L.lnci4+1)
	gen bank_lnre = ln(lnre + 1) - ln(L.lnre+1)
	gen bank_lnreres = ln(lnreres + 1) - ln(L.lnreres+1)
*	gen bank_prov = 2*elnatq_sum4/(lnls+L.lnls)
	gen bank_prov = ln(elnatq_sum4 + 1 ) - ln(L.elnatq_sum4 + 1 )
	gen roa = 2*netincq_sum4/(asset+L.asset)
	
	gen bank_lndep5 = ln(bank_dep+1) - ln(L5.bank_dep+1)
	gen bank_lnci5 = ln(lnci+1) - ln(L5.lnci+1)
	gen bank_lnci45 = ln(lnci4+1) - ln(L5.lnci4+1)
	gen bank_lnre45 = ln(lnre+1) - ln(L5.lnre+1)

	gen Llarge = cond(L.asset>10000000,1,0)
	
	egen ntrind = rowtotal(ntrindmm ntrinds)
	*gen ntrbus = rconb549 - rconp753 - rconp754
	egen ntrbus = rowtotal(ntripcmo ntripcso)
	egen trind  = rowtotal(trnindni trnindi)
	gen busdep 	= ntrbus + rconb549 - rconp753 - rconp754
	gen hhdep	= trind + ntrind
	gen bank_busdep = ln(busdep+1) - ln(L.busdep+1)
	gen bank_hhdep = ln(hhdep+1) - ln(L.hhdep+1)
	gen bank_hhbus = ln(hhdep+busdep) - ln(L.hhdep + L.busdep)
	
************** Quantile Analysis **************

	* Small business lending
	gen sbl = (lnci4+lnag4)/asset*100
	format sbl %9.2f
	label var sbl "Small Business Lending"
	
* Small Business Lending Quantiles
* Deciles of Small Business Loans to Assets Ratio
	egen SBquant=xtile(sbl), n(3) by(year)
	gen temp = SBquant if year==2002
	bysort cert: egen SBquant2002 = max(temp)

	drop temp 
	label var SBquant "Small Business Lending Quantile"
	order SBquant sbl
	
	gen temp = sbl if year==2002
	bysort cert: egen sbl2002 = max(temp)
	drop temp

***********************************************

* Drop mergers
	drop if merger == 1

	drop _merge
	noi sum sbl bank_lndep bank_gSmallFirm1 bank_bartikGS1

	noi disp "Saving `Data'.dta"
	compress
	save "$LocalData\\`Data'.dta", replace
	save "$LocalData\Archive\\`Data'`CurrentDate'.dta", replace
}

	
	
*For Bank Level Analysis using later data based on deposit data availability
	

****Using industry shares from 2013 for clustering for the 2014-2017 sample using bank deposit data
use "$LocalData\BankLevelAnalysis.dta", clear
keep cert year geography Lbank_county_wght
keep if year==2013
preserve
	insheet using "$SourceData\3_digit_q2.csv", clear
	keep if year==2013

	*Excluding Finance Industry and NAICS92, the latter because there is no data for large v. small
	drop if floor(industry/10)==52 | floor(industry/10)==92

	* Verify that there are no duplicates
	isid geography industry year 
	bysort geography: egen totalemp = sum(emp)
	replace emp = emp/totalemp
	* Keep only if the first year observed for each county is 2000.
	keep emp geography industry
	sort geography 
	save "$LocalData\BankClusterTemp2013.dta", replace
	
restore

preserve
	sort geography
	joinby geography using "$LocalData\BankClusterTemp2013.dta"

	gen bank_county_ind_wghts = Lbank_county_wght*emp
	sort cert industry
	collapse (sum) bank_county_ind_wghts, by (cert year industry)

		bysort cert: egen rank = rank(-bank_county_ind_wghts)
		keep if rank<2

		*Breaking ties
			set seed 98034
			generate u1 = runiform()
			bysort cert: egen rank2 = rank(u1)
			keep if rank2==1
		
		*Set Groups for clustering by largest 3-digit NAICS code
		
		egen cluster3 = group(industry)

		keep cert cluster3
	save "$LocalData\BankCluster3_2013.dta", replace
restore	


use "$LocalData\BankLevelAnalysis.dta", clear
keep cert year geography Lbank_county_wght
keep if year==2002

*For Bank Level Analysis
preserve
	insheet using "$SourceData\3_digit_q2.csv", clear
	keep if year==2000

	*Excluding Finance Industry and NAICS92, the latter because there is no data for large v. small
	drop if floor(industry/10)==52 | floor(industry/10)==92

	* Verify that there are no duplicates
	isid geography industry year 
	bysort geography: egen totalemp = sum(emp)
	replace emp = emp/totalemp
	* Keep only if the first year observed for each county is 2000.
	keep emp geography industry
	sort geography 
	keep if emp!=.
	save "$LocalData\BankClusterTemp2000.dta", replace
	
restore

preserve
	keep if year==2002

	sort geography
	joinby geography using "$LocalData\BankClusterTemp2000.dta"

	gen bank_county_ind_wghts = Lbank_county_wght*emp
	sort cert industry
	collapse (sum) bank_county_ind_wghts, by (cert year industry)

		bysort cert: egen rank = rank(-bank_county_ind_wghts)
		keep if rank<2

		*Breaking ties
			set seed 98034
			generate u1 = runiform()
			bysort cert: egen rank2 = rank(u1)
			keep if rank2==1
		
		*Set Groups for clustering by largest 3-digit NAICS code
		
		egen cluster3 = group(industry)
		keep cert cluster3
	save "$LocalData\BankCluster3_2002.dta", replace
restore		
	
	
* This runs if no error was returned.
else noi disp "Data already exists."
noi etime
noi disp " "



